SDG14

Author

Ignacio Saldivia Gonzatti

Published

February 7, 2023

Import packages and set things up

Code
import numpy as np
import pandas as pd
import os
import csv
import re
import composite as ci
import inspect
from functools import reduce
import plotly.express as px
from sklearn.preprocessing import RobustScaler

# from xml.etree import ElementTree as ET
# import requests
# import json
# import webbrowser
Code
# for VSC users, Latex in Plotly is not working
# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131
import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(
    HTML(
        '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
    )
)
Code
# show all output from cells
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  # last_expr
# show all columns
pd.set_option("display.max_columns", None)
Code
# create output folder if not there
if not os.path.exists("../output"):
    os.makedirs("../output")

Country names dict

Code
# load countries dictionary
country_to_abbrev = (
    pd.read_csv("../data/countryAbbr.csv", header=None, index_col=0).squeeze().to_dict()
)
# invert the dictionary
abbrev_to_country = dict(map(reversed, country_to_abbrev.items()))
# add additional abbreviations
abbrev_to_country["EL"] = "Greece"
abbrev_to_country["GB"] = "United Kingdom"

allEurope = pd.read_csv("../data/Countries-Europe.csv")
allEurope = allEurope.name.to_list()

# fmt: off
# countries included in the assessment
countries = [
    'Belgium','Germany','Denmark','Estonia','Spain','Finland','France',
'Ireland','Lithuania','Latvia','Netherlands','Poland','Portugal','Sweden',
'United Kingdom'
]
# countries used to calculate some variable targets (EU + UK)
countriesEU = [
    "Belgium", "Bulgaria", "Cyprus", "Greece", "Germany", "Croatia",
    "Italy", "Denmark", "Estonia", "Spain", "Finland", "France",
    "Ireland", "Lithuania", "Latvia", "Malta", "Netherlands", "Poland",
    "Portugal", "Romania", "Sweden", "United Kingdom",]
countryAbb = [
    x if x not in country_to_abbrev else country_to_abbrev[x] for x in countries
]
# fmt: on

Define general functions

Code
def missingCountries(df, countries=countries):
    ### check missing countries in dataset
    missing = []
    for i in countries:
        if i not in df.index.unique():
            missing.append(i)
    if len(missing) > 0:
        print("Missing countries:\n", "\n".join(missing))
    else:
        print("No missing countries")

def negativeValues(ds):
    ### check negative values in dataset
    if ds[ds < 0].count().sum() > 0:
        # replace negative values with 0
        ds[ds < 0] = 0
        print("Negative values in dataset replaced with 0")

def calculate_target(df, valueCol, countryCol):
    """
    Calculate target (max/min) for each country based on max/min of 3 top countries after 
    calculating max/min for each country after dropping outliers using Interquartile Range (IQR) proximity rule
    df: dataframe
    valueCol: column name of value
    countryCol: column name of country
    """
    # we drop outliers using Interquartile Range (IQR) proximity rule
    quartile_1, quartile_3 = np.percentile(df[valueCol], [25, 75])
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - 1.5 * iqr
    upper_bound = quartile_3 + 1.5 * iqr
    dfOutliers = df[~(df[valueCol] > upper_bound) | (df[valueCol] < lower_bound)]
    # target is max/min of 3 top countries after calculating max/min for each country
    avgMax3 = (
        dfOutliers[valueCol].groupby(dfOutliers[countryCol]).max().nlargest(3).mean()
    )
    avgMin3 = (
        dfOutliers[valueCol].groupby(dfOutliers[countryCol]).min().nsmallest(3).mean()
    )
    return avgMax3, avgMin3

SDG Official Data

SDG14 metadata can be found here

Code
# SDG14 indicators from the UNstats
# https://unstats.un.org/sdgs/dataportal/database

sdg14 = pd.read_excel("../data/Goal14_april2023.xlsx", sheet_name=0)
# fmt: off
sdg14 = sdg14[
    [ 
        "Target", "Indicator", "SeriesCode", "GeoAreaName", 
        "TimePeriod", "Value", "Units", "SeriesDescription" 
    ]
]
# fmt: on

sdg14.Value.replace("N", np.nan, inplace=True)
sdg14.Value = sdg14.Value.astype(np.float64)

# filter countries
# sdg14 = sdg14[sdg14['GeoAreaName'].isin(countries)]
# show indicators
pd.DataFrame(
    sdg14.groupby(["Indicator", "SeriesCode", "SeriesDescription", "Units"]).size()
)
0
Indicator SeriesCode SeriesDescription Units
14.1.1 EN_MAR_BEALITSQ Beach litter per square kilometer (Number) NUMBER 611
EN_MAR_BEALIT_BP Beach litter originating from national land-based sources that ends in the beach (%) PERCENT 900
EN_MAR_BEALIT_BV Beach litter originating from national land-based sources that ends in the beach (Tonnes) TONNES 900
EN_MAR_BEALIT_EXP Exported beach litter originating from national land-based sources (Tonnes) TONNES 900
EN_MAR_BEALIT_OP Beach litter originating from national land-based sources that ends in the ocean (%) PERCENT 900
EN_MAR_BEALIT_OV Beach litter originating from national land-based sources that ends in the ocean (Tonnes) TONNES 900
EN_MAR_CHLANM Chlorophyll-a anomaly, remote sensing (%) PERCENT 2784
EN_MAR_CHLDEV Chlorophyll-a deviations, remote sensing (%) PERCENT 4131
EN_MAR_PLASDD Floating plastic debris density (count per km2) NUMBER 3
14.2.1 EN_SCP_ECSYBA Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO) NUMBER 33
14.3.1 ER_OAW_MNACD Average marine acidity (pH) measured at agreed suite of representative sampling stations PH 903
14.4.1 ER_H2O_FWTL Proportion of fish stocks within biologically sustainable levels (not overexploited) (%) PERCENT 422
14.5.1 ER_MRN_MPA Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%) PERCENT 6049
14.6.1 ER_REG_UNFCIM Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.7.1 EN_SCP_FSHGDP Sustainable fisheries as a proportion of GDP PERCENT 5880
14.a.1 ER_RDE_OSEX National ocean science expenditure as a share of total research and development funding (%) PERCENT 159
14.b.1 ER_REG_SSFRAR Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.c.1 ER_UNCLOS_IMPLE Score for the implementation of UNCLOS and its two implementing agreements (%) PERCENT 63
ER_UNCLOS_RATACC Score for the ratification of and accession to UNCLOS and its two implementing agreements (%) PERCENT 63

Check official data by indicator

Code
# check sdg indicator, change SeriesCode to the one you want to check
# Example with Average marine acidity (pH) ER_OAW_MNACD
sdg14.loc[
    sdg14["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"
] = "United Kingdom"
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
sdg14check = sdg14check[sdg14check["SeriesCode"] == "ER_OAW_MNACD"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
mr = sdg14check.columns[-3:].to_list()
sdg14check[mr]
missingCountries(sdg14check)
TimePeriod 2019 2020 2021
GeoAreaName
Belgium 7.794000 8.139600 7.8710
Estonia 7.783000 7.916000 7.7255
Finland 8.223464 NaN NaN
France 8.023000 NaN NaN
Netherlands 7.904056 NaN NaN
Spain 8.036000 NaN NaN
Sweden 8.082261 8.117167 NaN
United Kingdom 7.988000 NaN NaN
Missing countries:
 Germany
Denmark
Ireland
Lithuania
Latvia
Poland
Portugal

14.1

(a) Index of coastal eutrophication

We use Gross Nitrogen Balance: Max-Min transformation where the max-value is the maximum value in the period 2012-2021 and the min-value is zero.

Gross nutrient balance (Nitrogen kg/ha), Eurostat

Source

Code
# Gross nutrient balance (BAL_UAA, Nitrogen kg/ha), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/AEI_PR_GNB__custom_153613/

nitro = pd.read_csv("../data/aei_pr_gnb_page_linear.csv")
nitro["geo"] = nitro["geo"].map(abbrev_to_country).fillna(nitro["geo"])
years = range(2012,nitro.TIME_PERIOD.max()+1)
# We use all countries in the EU to calculate the target value.
nitro = nitro[(nitro["geo"].isin(countriesEU)) & (nitro["TIME_PERIOD"].isin(years))]
# Countries can have nutrient deficiency (negative value) indicating declining soil fertility. 
negativeValues(nitro.OBS_VALUE)
# For eutrophication, the less nutrient balance the better.
target = min(calculate_target(nitro, 'OBS_VALUE', 'geo'))
nitro['target'] = (target/nitro.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0,100)

years = [2012,2016,nitro.TIME_PERIOD.max()]
nitro = nitro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
nitro.ffill(axis=1, inplace=True)
nitro[years][nitro.index.isin(countries)]

missingCountries(nitro)
Negative values in dataset replaced with 0
No missing countries
TIME_PERIOD 2012 2016 2019
geo
Belgium 6.568833 7.121212 7.121212
Denmark 11.270983 11.750000 11.750000
Estonia 33.571429 33.571429 33.571429
Finland 19.789474 19.831224 21.510297
France 40.000000 19.789474 24.736842
Germany 12.516644 13.613324 17.602996
Ireland 26.780627 17.375231 15.088283
Latvia 39.166667 37.007874 75.806452
Lithuania 32.191781 37.600000 23.039216
Netherlands 5.529412 4.832905 5.669481
Poland 19.542620 21.315193 19.831224
Portugal 21.412301 20.128480 20.796460
Spain 27.976190 24.102564 19.066937
Sweden 30.128205 25.613079 36.015326
United Kingdom 10.742857 10.867052 10.917538
Marine waters affected by eutrophication

Source

Code
# Marine waters affected by eutrophication (km2 and % EEZ), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/sdg_14_60/default/table?lang=en

eutro = pd.read_csv("../data/sdg_14_60_linear.csv")
eutro["geo"] = eutro["geo"].map(abbrev_to_country).fillna(eutro["geo"])
years = range(2012,eutro.TIME_PERIOD.max()+1)
eutro = eutro[eutro["geo"].isin(countriesEU) & (eutro["unit"] == "PC") & (eutro['TIME_PERIOD'].isin(years))]  # KM2 for area
# The less eutrophication the better. We ignore 0 values to calculate meaningful target
target = min(calculate_target(eutro[eutro.OBS_VALUE!=0] , 'OBS_VALUE', 'geo'))
eutro['target'] = (target/eutro.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,eutro.TIME_PERIOD.max()]
eutro = eutro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
# UK missing. We will use the average of the other countries each year
eutro.loc["United Kingdom"] = eutro.mean()
eutro[years][eutro.index.isin(countries)]
missingCountries(eutro)
TIME_PERIOD 2012 2016 2022
geo
Belgium 100.000000 100.000000 100.000000
Denmark 100.000000 50.000000 0.409836
Estonia 3.703704 5.555556 100.000000
Finland 3.703704 2.702703 0.781250
France 100.000000 33.333333 2.631579
Germany 100.000000 100.000000 50.000000
Ireland 100.000000 100.000000 16.666667
Latvia 11.111111 7.692308 100.000000
Lithuania 50.000000 5.000000 100.000000
Netherlands 100.000000 100.000000 5.555556
Poland 25.000000 10.000000 100.000000
Portugal 1.923077 3.571429 0.523560
Spain 5.882353 5.555556 1.666667
Sweden 14.285714 4.545455 0.149925
United Kingdom 67.409984 43.100992 56.113573
No missing countries

(b) Floating Plastic Debris Density

We use two indicators:

  1. Plastic Waste kg/ha: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

  2. Recovery Rate of Plastic Packaging: Without further transformation as score is provided dimensionless.

1. Plastic Waste kg/ha, Eurostat

Source

Code
# Plastic Waste kg/hab, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_WASGEN/

wasteG = pd.read_csv("../data/env_wasgen.csv")
years = range(2012,wasteG.TIME_PERIOD.max()+1)
wasteG["geo"] = wasteG["geo"].map(abbrev_to_country).fillna(wasteG["geo"])
wasteG = wasteG[wasteG["geo"].isin(countriesEU) & wasteG["TIME_PERIOD"].isin(years) & wasteG["unit"].isin(["KG_HAB"])]
# The less plastic waste, the better. 
target = min(calculate_target(wasteG, 'OBS_VALUE', 'geo'))
wasteG['target'] = (target/wasteG.OBS_VALUE * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,wasteG.TIME_PERIOD.max()]
wasteG = wasteG.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
# fill UK 2020 with 2019 value
wasteG = wasteG.ffill(axis=1)
wasteG[years][wasteG.index.isin(countries)]
missingCountries(wasteG)
TIME_PERIOD 2012 2016 2020
geo
Belgium 13.559322 12.698413 9.302326
Denmark 42.105263 40.000000 33.333333
Estonia 47.058824 24.242424 25.000000
Finland 47.058824 50.000000 33.333333
France 32.000000 27.586207 22.222222
Germany 25.806452 24.242424 21.621622
Ireland 30.769231 25.806452 20.000000
Latvia 72.727273 25.000000 13.559322
Lithuania 47.058824 25.806452 20.000000
Netherlands 23.529412 25.806452 21.621622
Poland 32.000000 23.529412 13.559322
Portugal 47.058824 26.666667 18.604651
Spain 33.333333 50.000000 42.105263
Sweden 44.444444 25.000000 24.242424
United Kingdom 21.621622 17.777778 18.181818
No missing countries
2. Recovery rates for packaging waste, Plastic Packaging

Source

Code
# Recovery rates for packaging waste, Plastic Packaging, Percentage, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ten00062/

wasteR = pd.read_csv("../data/ten00062.csv")
years = range(2012,wasteR.TIME_PERIOD.max()+1)
wasteR["geo"] = wasteR["geo"].map(abbrev_to_country).fillna(wasteR["geo"])
wasteR = wasteR[wasteR["geo"].isin(countriesEU) & wasteR["TIME_PERIOD"].isin(years)]
# The more recovery, the better. 
target = max(calculate_target(wasteR, 'OBS_VALUE', 'geo'))
wasteR['target'] = (wasteR.OBS_VALUE/target * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,wasteR.TIME_PERIOD.max()]
wasteR = wasteR.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
# fill empty values with last available year
wasteR = wasteR.ffill(axis=1)
wasteR[years][wasteR.index.isin(countries)]
missingCountries(wasteR)
TIME_PERIOD 2012 2016 2020
geo
Belgium 92.885772 99.699399 99.599198
Denmark 99.599198 98.296593 73.246493
Estonia 44.088176 85.971944 87.575150
Finland 51.102204 97.394790 99.599198
France 64.128257 64.629259 71.943888
Germany 99.899800 100.000000 100.000000
Ireland 74.448898 79.859719 100.000000
Latvia 38.777555 41.883768 47.995992
Lithuania 38.977956 74.549098 63.527054
Netherlands 98.296593 95.991984 95.090180
Poland 26.252505 54.909820 45.090180
Portugal 39.478958 50.000000 57.014028
Spain 53.306613 61.923848 55.611222
Sweden 58.216433 61.823647 50.801603
United Kingdom 38.176353 58.617234 56.312625
No missing countries

14.2

Proportion of national exclusive economic zones managed using ecosystem-based approaches

We use two indicators:

  1. Progress of implementation of Maritime Spatial Planning. Categorical data

  2. OHI Habitat: measures the status of marine habitats that support large numbers of marine species.

1. Progress of implementation of Maritime Spatial Planning

Source

Code
msp = pd.read_csv("../data/mspData.csv")
dictScore = {
    "implemented": 4,
    "complete not implemented": 3,
    "under development": 2,
    "not started": 1,
}
Code
mspVal = (
    msp[["Country", "2012", "2016", "2022"]]
    .set_index("Country")
    .replace(dictScore)
    # .melt(ignore_index=False)
    # .rename(columns={"variable": "Year", "value": "Score"})
    # .set_index("Year", append=True)
    # .sort_index()
)
mspVal = mspVal * 1 / len(dictScore) * 100
mspVal
missingCountries(mspVal)
2012 2016 2022
Country
Belgium 50.0 100.0 100.0
Germany 100.0 100.0 100.0
Denmark 25.0 25.0 100.0
Estonia 25.0 50.0 100.0
Spain 25.0 25.0 75.0
Finland 25.0 25.0 100.0
France 25.0 25.0 100.0
Ireland 25.0 25.0 100.0
Lithuania 50.0 75.0 100.0
Latvia 50.0 50.0 100.0
Netherlands 100.0 100.0 100.0
Poland 25.0 50.0 100.0
Portugal 25.0 50.0 100.0
Sweden 25.0 50.0 100.0
United Kingdom 25.0 50.0 100.0
No missing countries
2. OHI Biodiversity

Source

Code
# OHI 'Biodiversity' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiBio = pd.read_csv("../data/scoresOHI.csv")
ohiBio = ohiBio[ohiBio["region_name"].isin(countries)]
ohiBio = ohiBio[
    (ohiBio.long_goal == "Biodiversity") & (ohiBio.dimension == "status")
]
ohiBio = ohiBio.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiBio[[2012, 2018, 2022]]
missingCountries(ohiBio)
scenario 2012 2018 2022
region_name
Belgium 75.28 71.91 71.84
Denmark 74.24 72.71 72.39
Estonia 82.49 76.08 76.10
Finland 81.00 74.08 73.82
France 72.60 72.86 74.12
Germany 75.06 71.70 71.52
Ireland 69.19 66.90 66.23
Latvia 78.47 71.93 72.59
Lithuania 77.68 71.12 71.80
Netherlands 68.51 67.19 67.97
Poland 69.42 67.01 66.59
Portugal 75.57 72.75 71.87
Spain 72.16 69.23 68.17
Sweden 76.72 73.50 72.54
United Kingdom 69.87 72.81 72.11
No missing countries

14.3

Average marine acidity (pH) measured at agreed suite of representative sampling stations

We use two indicators:

  1. Greenhouse gas emissions under the Effort Sharing Decision (ESD): Max-Min transformation where the max- and min-values are the maximum and minimum in the assessment period (2012-2021)

  2. Carbon Emissions per capita: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

Reconstructed estimates of sea surface pH assessed with GLODAPv2.2021

Source Copernicus

Code
# ph Calculations in pHCountryLevel.ipynb file
phCopernicus = pd.read_csv("../data/phCopernicus.csv", index_col=0)
# We set target pH as pre-industrial level https://www.eea.europa.eu/ims/ocean-acidification
target = 8.2
phCopernicus = phCopernicus/target * 100
phCopernicus
missingCountries(phCopernicus)
2012 2016 2021
Belgium 98.416817 98.372463 98.339183
Germany 98.233476 98.223317 98.103500
Denmark 98.363012 98.343317 98.238829
Estonia 97.613476 98.026768 97.609902
Spain 98.562378 98.445439 98.370024
Finland 97.765500 97.981232 97.717159
France 98.629780 98.511951 98.424024
Ireland 98.740963 98.616902 98.506000
Lithuania 96.980007 97.417059 97.082190
Latvia 97.096866 97.692573 97.198626
Netherlands 98.596902 98.471293 98.394500
Poland 96.469329 97.056435 96.550528
Portugal 98.583268 98.497232 98.378049
Sweden 97.437732 97.776390 97.489500
United Kingdom 98.674927 98.567567 98.460122
No missing countries

2. Carbon emissions per capita

Several sources (see code)

Code
# Population on 1 January, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/tps00001/

pop = pd.read_csv("../data/tps00001.csv")
pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])
pop = pop[pop["geo"].isin(countriesEU)]
pop.rename(columns={"OBS_VALUE": "population"}, inplace=True)
Code
# CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

co2 = pd.read_csv("../data/env_air_gge.csv")
co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
co2 = co2[
    co2["geo"].isin(countriesEU)
    & (co2.airpol == "CO2")
    & (co2.src_crf == "TOTX4_MEMONIA")
]

# Data to include UK. Data for other countries is almost the same as in the previous dataset
# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)
# https://globalcarbonbudget.org/carbonbudget/

co2GCB = pd.read_excel(
    "../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name=1, skiprows=11, index_col=0
)
# convert from carbon to co2 and from Mt to thousand tonnes
co2GCB = co2GCB * 3.664 * 1_000 
co2GCB_UK = pd.DataFrame(co2GCB['United Kingdom']).reset_index().rename(columns={'index':'TIME_PERIOD', 'United Kingdom':'OBS_VALUE'})
co2GCB_UK['geo'] = 'United Kingdom'
co2 = pd.concat([co2, co2GCB_UK], ignore_index=True)
# thousand tonnes co2 to co2 per capita
co2 = pd.merge(co2, pop, on=['geo', 'TIME_PERIOD'], how='left')
co2['co2pc'] = co2['OBS_VALUE'] * 1_000 / co2['population']

years = range(2012,co2.TIME_PERIOD.max()+1)
co2 = co2[co2.TIME_PERIOD.isin(years)]
# The less co2pc, the better. 
target = min(calculate_target(co2, 'co2pc', 'geo'))
co2['target'] = (target/co2.co2pc * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,co2.TIME_PERIOD.max()]
co2 = co2.pivot_table(
    columns="TIME_PERIOD", index="geo", values="target", aggfunc="mean"
)
# fill UK 2021 with closest value
co2 = co2.ffill(axis=1)
co2[years][co2.index.isin(countries)]
missingCountries(co2)
TIME_PERIOD 2012 2016 2021
geo
Belgium 37.734121 39.459259 41.833681
Denmark 47.376240 51.718478 68.103705
Estonia 26.783744 27.085479 45.744259
Finland 36.906728 40.406167 51.708163
France 61.806654 66.847742 75.572829
Germany 34.783657 35.997628 43.288290
Ireland 41.662118 39.922992 46.737265
Latvia 93.965364 93.972191 91.681403
Lithuania 76.334698 77.951383 72.164336
Netherlands 34.459622 34.839212 42.925952
Poland 42.102450 42.241700 41.185795
Portugal 72.420665 69.650043 88.751684
Spain 58.295610 61.088196 72.095049
Sweden 70.390420 77.794760 95.307931
United Kingdom 47.256895 59.384996 74.533451
No missing countries

14.4

Proportion of fish stocks within biologically sustainable levels

We use two indicators:

  1. FMSY/F: catch-weighted average

  2. B/BMSY: catch-weighted average

1. FMSY/F

Data compiled partially manually

Source of FMSY and F is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
fmsyF = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
fmsyF = fmsyF[["Fref/F"]] * 100
fmsyF = fmsyF.droplevel(0, axis=1)
# most recent assessment is for 2022
fmsyF.rename(columns={"most recent": 2022}, inplace=True)
fmsyF = fmsyF[fmsyF.index.isin(countries)]
fmsyF
missingCountries(fmsyF)
Year 2012 2016 2022
Belgium 84.278853 89.253671 97.182818
Estonia 95.038299 82.573446 77.562646
Finland 99.177365 91.229736 82.840916
France 91.013767 93.548510 93.938257
Germany 81.283936 87.261924 90.831432
Ireland 71.053790 62.685203 86.770330
Latvia 94.318542 89.406109 81.381134
Lithuania 91.787307 83.126001 82.901621
Netherlands 85.883804 88.278969 93.834910
Poland 89.263580 89.580011 74.853470
Portugal 78.732711 84.161276 94.842807
Spain 92.064992 88.531210 91.937691
Sweden 91.475417 78.225767 74.962550
United Kingdom 98.135098 95.872798 90.761277
Denmark 95.419499 85.849679 88.529445
No missing countries

2. B/BMSY

Data collected partially manually

Source of BMSY and B is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
bBmsy = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
bBmsy = bBmsy[["B/Bref"]] * 100
bBmsy = bBmsy.droplevel(0, axis=1)
bBmsy.rename(columns={"most recent": 2022}, inplace=True)
bBmsy = bBmsy[bBmsy.index.isin(countries)]
bBmsy
missingCountries(bBmsy)
Year 2012 2016 2022
Belgium 91.982255 94.662804 91.871386
Estonia 99.839423 99.933347 95.679809
Finland 99.843072 99.941251 95.909145
France 97.115681 96.936702 96.369335
Germany 90.088417 93.281519 93.439957
Ireland 92.130861 85.437192 89.825424
Latvia 97.564685 99.565412 97.360783
Lithuania 92.382483 99.191985 96.254817
Netherlands 92.736442 89.856357 96.706294
Poland 96.568884 98.135256 92.284320
Portugal 98.746282 97.786405 98.794184
Spain 99.443829 96.930800 96.504370
Sweden 96.430034 97.171833 91.413094
United Kingdom 96.715727 95.636901 97.260299
Denmark 96.031151 95.816545 95.730634
No missing countries

14.5

Coverage of protected areas in relation to marine area

We consider two indicators:

  1. Coverage of protected areas in relation to marine areas: Distance to Reference transformation where the max-value is set to 30% and the min-value is 0%
  2. OHI ‘Biodiversity’ Index : No further transformation

1. Marine protected areas (% of territorial waters)

Source

Code
# EEZ area
# Data: https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0
# https://www.europarl.europa.eu/factsheets/en/sheet/100/outermost-regions-ors-
outermost = [
    # Portugal
    "Azores",
    "Madeira",
    # Spain
    "Canary Islands",
    # we could include outermost regions of France,
    # but we care about EEZs in the Atlantic, Baltic and North Sea
]
eez = pd.read_csv(
    "../data/EMODnet_EEZ_v11_20210506.csv",
    delimiter=";",
    encoding="latin-1",
)

eez = eez[eez["Territory"].isin(outermost + countries)]
for country in ["France", "Portugal", "Spain"]:
    eez.loc[eez["Country"].str.contains(country), "Country"] = country
eez = eez.apply(pd.to_numeric, errors="ignore")
eez = eez[["Country", "Area_km2"]].groupby(["Country"]).sum(numeric_only=True)
eez = eez[eez.index.isin(countries)]
eez
Area_km2
Country
Belgium 3495
Denmark 105021
Estonia 36451
Finland 81553
France 345240
Germany 56763
Ireland 427039
Latvia 28353
Lithuania 6832
Netherlands 64328
Poland 29982
Portugal 1728718
Spain 1007673
Sweden 155347
United Kingdom 731309
Code
# https://www.eea.europa.eu/data-and-maps/dashboards/natura-2000-barometer (in Barometer table)
# Data is divided by year
natura2k = pd.DataFrame()
for root, dirs, files in os.walk("../data/natura2k"):
    for file in files:
        if file.endswith(".csv"):
            tempDF = pd.read_csv(
                os.path.join(root, file), sep="\t", encoding="utf-16", header=1
            )
            tempDF["year"] = re.findall("\d+", file[:-4])[0]
        natura2k = pd.concat([natura2k, tempDF], axis=0).reset_index(drop=True)
natura2k = natura2k.loc[:, natura2k.columns.str.contains("marine|year|Unnamed")]
natura2k.columns = np.concatenate([natura2k.iloc[0, :-1], natura2k.columns[-1:]])
natura2k = natura2k.iloc[1:].reset_index(drop=True)
natura2k.columns = natura2k.columns.str.strip()
natura2k = natura2k.set_index("Country")
natura2k = natura2k[natura2k.index.isin(countries)]
natura2kEEZ = natura2k.merge(eez, left_index=True, right_index=True)
natura2kEEZ = natura2kEEZ.apply(pd.to_numeric, errors="ignore")
natura2kEEZ["naturaEEZ"] = natura2kEEZ["Natura 2000"] / natura2kEEZ["Area_km2"] * 100
# target of 30% MPAs of EEZ
natura2kEEZ["Score"] = (100 * (natura2kEEZ.naturaEEZ) / 30).clip(upper=100)
natura2kEEZ = natura2kEEZ.pivot_table(index="Country", columns="year", values="Score")
# Use 2019 value for UK
natura2kEEZ.ffill(axis=1, inplace=True)
natura2kEEZ[[2012, 2016, 2021]]
missingCountries(natura2kEEZ)
year 2012 2016 2021
Country
Belgium 100.000000 100.000000 100.000000
Denmark 60.473620 60.473620 60.473620
Estonia 61.763280 61.763280 61.763280
Finland 29.183476 29.183476 33.278972
France 40.225157 40.247364 100.000000
Germany 100.000000 100.000000 100.000000
Ireland 5.360166 8.007856 8.005514
Latvia 20.315311 51.575965 51.587721
Lithuania 32.933255 76.258782 76.258782
Netherlands 61.139784 78.156738 85.271318
Poland 80.459387 80.448269 80.559447
Portugal 0.507891 6.148101 8.182171
Spain 3.515029 27.920433 28.009086
Sweden 20.023989 43.406052 43.485444
United Kingdom 33.704859 57.729359 60.225340
No missing countries

Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%)

Source: official SDG 14 data

Code
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
kbaMPA = sdg14check[sdg14check["SeriesCode"] == "ER_MRN_MPA"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
kbaMPA[[2012, 2016, 2022]]
missingCountries(kbaMPA)
TimePeriod 2012 2016 2022
GeoAreaName
Belgium 96.49980 96.84682 96.85317
Denmark 86.66340 86.74673 86.74673
Estonia 97.60969 97.60982 97.62768
Finland 59.78557 60.80889 60.85425
France 62.51162 76.43594 81.88238
Germany 74.06325 74.51850 80.79027
Ireland 75.92082 75.97923 83.16451
Latvia 96.16348 96.16360 96.16360
Lithuania 65.53137 83.51997 83.51997
Netherlands 93.31337 96.64630 96.64630
Poland 87.25113 87.25113 87.31556
Portugal 68.28585 70.43105 70.76779
Spain 62.51987 85.84322 85.86279
Sweden 56.67376 59.73273 60.46026
United Kingdom 80.23372 80.43096 81.23336
No missing countries

14.6

Degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing

We use two indicators:

  1. Fishing Subsidies relative to Landings: Max-Min Transformation where the max-value and min-value are the maximum and minimum in the assessment period (2012-2021)
  2. TAC/Catch:

1. Fishing Subsidies relative to Landings

Source

Code
# selection of subsidies by level of risk to fishery sustainability
# as per https://stats.oecd.org/Index.aspx?QueryId=121718
highRisk = [
    "I.E.1. Fuel tax concessions",
    "I.A.1. Transfers based on variable input use",
    "I.A.2.1.Support to vessel construction/purchase",
    "I.A.2.2.Support to modernisation",
    "I.A.2.3.Support to other fixed costs",
    "II.A. Access to other countries’ waters",
    "I.B.2. Special insurance system for fishers",
]

moderateRisk = [
    "I.B.1. Income support",
    "I.C. Transfers based on the reduction of productive capacity",
    "II.B.1. Capital expenditures",
    "II.B.2. Subsidized access to infrastructure",
]

uncertainRisk = [
    "I.D. Miscellaneous direct support to individuals and companies",
    "I.E.2. Other tax exemptions",
    "II.D. Support to fishing communities",
    "II.C. Marketing and promotion",
    "II.E. Education and training",
    "II.F. Research and development",
    "II.H. Miscellaneous support for services to the sector",
]


noRisk = [
    "II.G.1. Management expenditures",
    "II.G.2. Stock enhancement programs",
    "II.G.3. Enforcement expenditures",
]
Code
# Fisheries Support Estimate
# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE
# selection as per https://stats.oecd.org/Index.aspx?QueryId=121718

fse = pd.read_csv("../data/FISH_FSE.csv")
years = range(2012,fse.Year.max())
# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')
fse = fse[(fse["Country"].isin(countriesEU)) & (fse["Measure"].isin(["US dollar"])) & (fse["Year"].isin(years))]

# strip variable codes and delete parent variables to avoid double counting
# solution from https://stackoverflow.com/q/76183612/14534411

separator = "."
fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)

variableAll = fse.vCode.unique().tolist()


def is_parent(p, target):
    return p.startswith(target) and len(p) > len(target) and p[len(target)] == "."


vSupport = []
prev = ""
for s in sorted(variableAll) + [""]:
    if prev and not is_parent(s, prev):
        vSupport.append(prev)
    prev = s

fse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]

# We calculate the ratio of high-risk subsidies to total subsidies 
fseRisk = fse[fse.Variable.isin(highRisk)]
fseRisk = fseRisk.groupby(["Country", "Year"]).sum(numeric_only=True)
fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)
fseRiskRatio = (fseRisk / fse * 100).reset_index()
# The less high-risk subsidies the better. We ignore 0 values to calculate meaningful target
target = min(calculate_target(fseRiskRatio[fseRiskRatio.Value!=0], 'Value', 'Country'))
fseRiskRatio['target'] = (target/fseRiskRatio.Value * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,fseRiskRatio.Year.max()]
fseRiskkRatio = fseRiskRatio.pivot_table(
    columns="Year", index="Country", values="target", aggfunc="mean"
)
# fill missing values with nearest, then backfill and forward fill
fseRiskkRatio = (
    fseRiskkRatio.interpolate(method="nearest", axis=1, limit_direction="both")
    .bfill(axis=1)
    .ffill(axis=1)
)
# Finland is missing, we use the mean per year
fseRiskkRatio.loc["Finland"] = fseRiskkRatio.mean()

fseRiskkRatio[years][fseRiskkRatio.index.isin(countries)]
missingCountries(fseRiskkRatio)
Year 2012 2016 2019
Country
Belgium 1.139037 2.272171 2.298623
Denmark 0.698986 0.715540 0.772050
Estonia 7.391742 100.000000 3.199879
France 1.033066 67.197243 1.311843
Germany 12.965808 4.147636 12.577897
Ireland 0.430904 9.167110 21.919322
Latvia 3.871829 100.000000 100.000000
Lithuania 1.498189 11.784437 1.451992
Netherlands 9.666849 3.594301 1.575328
Poland 0.441716 0.280389 0.356007
Portugal 1.130799 0.323543 0.479279
Spain 10.216785 3.002641 4.131087
Sweden 0.716534 0.632158 0.989415
United Kingdom 3.810226 4.267987 2.925930
Finland 10.202035 20.662909 10.199273
No missing countries

3. IUU Fishing Index

Source

Code
selectedIUU = [
    "Accepted FAO Compliance Agreement",
    "Mandatory vessel tracking for commercial seagoing fleet",
    "Operate a national VMS/FMC centre",
    "Designated ports specified for entry by foreign vessels",
    "Ratification/accession of UNCLOS Convention",
    "Ratification/accession of UNFSA",
    "Have a NPOA-IUU ",
    "Compliance with RFMO flag state obligations",
    "Compliance with RFMO port state obligations",
]
Code
iuuIndex = pd.read_csv(
    "../data/iuu_fishing_index_2021-data_and_disclaimer/iuu_fishing_index_2019-2021_indicator_scores.csv"
)
iuuIndex = iuuIndex[
    (iuuIndex["Country"].isin(countries))
    & (iuuIndex["Indicator name"].isin(selectedIUU))
]

iuuIndex["Score"] = iuuIndex.groupby(
    ["Year", "Indicator name"], sort=False, group_keys=False
)["Score"].apply(lambda x: x.fillna(x.mean()))
iuuIndex = iuuIndex.pivot_table(
    index=["Country", "Year"], columns="Indicator name", values="Score"
)
alpha = 1 / len(iuuIndex.columns)
sigma = 10
compositeIUU = ci.compositeDF(alpha, iuuIndex, sigma)
compositeIUU = pd.DataFrame(compositeIUU, columns=["IUU"])
# The target is the best in the IIU Fishing Index = 1
compositeIUU['target'] = 1/compositeIUU.IUU * 100
compositeIUU = compositeIUU.reset_index().pivot_table(
    index="Country", columns="Year", values="target"
)
compositeIUU
missingCountries(compositeIUU)
Year 2019 2021
Country
Belgium 100.000000 64.031078
Denmark 51.245497 65.768667
Estonia 70.952143 70.952143
Finland 53.277658 64.031078
France 54.608684 61.476344
Germany 51.245497 70.952143
Ireland 58.866791 70.952143
Latvia 90.295268 70.952143
Lithuania 51.245497 54.146791
Netherlands 46.306308 51.245497
Poland 72.707739 70.952143
Portugal 47.352555 70.194173
Spain 54.608684 48.688266
Sweden 70.952143 70.952143
United Kingdom 47.352555 41.915876
No missing countries

2. TAC/Catch

Data extracted partially manually.

Source of TAC is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
tacCatch = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
tacCatch = tacCatch[["TAC/Catch"]] * 100
tacCatch = tacCatch.droplevel(0, axis=1)
# most recent assessment is 2022
tacCatch.rename(columns={"most recent": 2022}, inplace=True)
tacCatch = tacCatch[tacCatch.index.isin(countries)]
tacCatch
missingCountries(tacCatch)
Year 2012 2016 2022
Belgium 78.480130 95.507005 97.255892
Estonia 98.404500 99.435507 96.478689
Finland 98.942568 99.843818 92.122689
France 84.168757 91.279043 97.873527
Germany 97.890177 96.075084 96.254821
Ireland 95.739977 94.352001 97.971417
Latvia 99.389419 99.008097 96.696104
Lithuania 98.155150 98.005229 95.209801
Netherlands 92.986309 95.043900 97.158599
Poland 99.762408 98.797493 94.911954
Portugal 96.065583 97.542063 98.963306
Spain 81.619492 94.504539 98.517713
Sweden 96.705334 98.484897 95.600203
United Kingdom 95.734499 93.625841 98.166726
Denmark 94.385126 96.196738 96.258764
No missing countries

14.7

Sustainable fisheries as a proportion of GDP in small island developing States, least developed countries and all countries

We use two indicators:

  1. ‘Livelihoods & economies’ Index as per Baltic Health Index (BHI)
  2. ‘Tourism’ Index as per BHI

1. ‘Livelihoods & economies’ Index

Divided into:

  1. Economies: Gross Value Added (GVA) annual growth rate of marine sectors against 1.5% target
  2. Livelihoods: Two subindicators: GVA/Hours Worked & FTE Employees/Coastal population

Source Blue Economy

Economies
Code
# Economies indicator, Value added at factor cost - million euro
# https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
gva = pd.read_excel("../data/blueEconomyObservatory/valueAddedFactorCost.xlsx")
gva.rename(
    columns={
        "Value added at factor cost (€ million)": "gva",
        "Member State": "country",
    },
    inplace=True,
)
gva = gva[gva["country"].isin(countriesEU)]
gva = gva[gva.Sector != "-"]
gvaSector = gva.groupby(["country", "Sector", "Year"]).sum(numeric_only=True)
gvaSector["growthRate"] = gvaSector.groupby(["country", "Sector"])["gva"].pct_change()
# target of 1.5% annual growth rate
gvaSector["sectorScore"] = (gvaSector["growthRate"] / 0.015 * 100).clip(
    upper=100, lower=0
)
gvaSector["weight"] = gvaSector["gva"] / gvaSector.groupby(["country", "Year"])[
    "gva"
].transform("sum")

gvaSector["weightedScore"] = gvaSector["sectorScore"] * gvaSector["weight"]
gvaEcon = gvaSector.groupby(["country", "Year"]).sum(numeric_only=True)[
    ["gva", "weightedScore"]
]
years = [2012, 2016, gvaEcon.index.get_level_values("Year").max()]
economies = gvaEcon.reset_index().pivot_table(
    index="country", columns="Year", values="weightedScore"
)
economies[economies.index.isin(countries)][years]
missingCountries(economies)
Year 2012 2016 2020
country
Belgium 96.873934 21.658872 21.297335
Denmark 4.504192 22.266304 61.905965
Estonia 52.767271 48.123759 54.687999
Finland 35.661048 73.708100 30.287035
France 32.382788 28.159942 0.247155
Germany 38.803670 64.176248 5.469456
Ireland 87.952005 100.000000 28.923979
Latvia 97.411580 2.196562 19.610579
Lithuania 37.153807 61.513965 42.140579
Netherlands 84.178366 21.389797 40.346288
Poland 62.903198 59.160696 80.944135
Portugal 61.985137 96.467062 9.280987
Spain 3.312837 85.334154 0.000000
Sweden 43.109175 71.468051 14.636507
United Kingdom 100.000000 20.980994 0.000000
No missing countries
Livelihoods
Code
# Livelihoods - Quality
# Hours Worked https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
# Select Insight Advisor, search in Asset for the measure and select Dimensions: year, member state

# hoursWorked = pd.read_excel('../data/blueEconomyObservatory/hoursWorked.xlsx')
# hoursWorked.rename(columns={'Member State':'country_name','Year':'year','Hours worked by employees':'hoursWorked'}, inplace=True)
# gvaHoursW = hoursWorked.merge(gvaEcon.reset_index(), on=['country_name','year'], how='left')
# gvaHoursW['gvaHoursW'] = gvaHoursW['value'] / gvaHoursW['hoursWorked'] * 1_000_000
# gvaHoursW = gvaHoursW[['country_name','year','gvaHoursW']].set_index(['country_name','year'])
# gvaHoursW.pivot_table(index='country_name', columns='year', values='gvaHoursW')

# Above, I calculated the GVA per hour worked ratio using the raw data of each variable.
# The result is different from the ratio variable provided by the Blue Economy Observatory. 
# I emailed them to ask for clarification.

gvaHoursW = pd.read_excel(
    "../data/blueEconomyObservatory/GVAperHourWorked.xlsx"
).set_index(["Year", "Member State"])
gvaHoursW.rename(
    columns={"Gross value added per hour worked by employees": "gvaHoursW"},
    inplace=True,
)
gvaHoursW["gvaHoursW"] = gvaHoursW["gvaHoursW"].apply(pd.to_numeric, errors="coerce")
# Comparative price levels https://ec.europa.eu/eurostat/databrowser/view/TEC00120/default/table?lang=en
pppEurostat = pd.read_csv("../data/pppEurostat.csv")
pppEurostat.geo = pppEurostat.geo.replace(abbrev_to_country)
pppEurostat = (
    pppEurostat[pppEurostat.geo.isin(countriesEU)]
    .rename(columns={"geo": "Member State", "TIME_PERIOD": "Year", "OBS_VALUE": "ppp"})[
        ["ppp", "Member State", "Year"]
    ]
    .set_index(["Member State", "Year"])
)
gvaHoursW = gvaHoursW.merge(pppEurostat, left_index=True, right_index=True)
gvaHoursW["gvaHoursWppp"] = gvaHoursW["gvaHoursW"] / gvaHoursW["ppp"] * 100
gvaHoursW = gvaHoursW[gvaHoursW.index.get_level_values("Year").isin(range(2012, 2023))].reset_index()

# Denmark and France are missing.
gvaHoursW["gvaHoursWppp"] = gvaHoursW.groupby("Year")["gvaHoursWppp"].transform(
    lambda x: x.fillna(x.mean())
)

# We set target using top 3 countries. The more gva/hours worked the better.
target = max(calculate_target(gvaHoursW, 'gvaHoursWppp', 'Member State'))
gvaHoursW['target'] = (gvaHoursW.gvaHoursWppp/target * 100).replace([np.inf, -np.inf], 100).clip(0,100)
Code
# Livelihoods - Quantity
# FTE employees https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en

fteEmployment = pd.read_excel("../data/blueEconomyObservatory/employmentFTE.xlsx")
fteEmployment.rename(
    columns={
        "Member State": "country_name",
        "Year": "year",
        "Employees in full time equivalent units": "fteEmployees",
    },
    inplace=True,
)
# Coastal areas https://ec.europa.eu/eurostat/web/coastal-island-outermost-regions/methodology
coastArea = pd.read_excel("../data/NUTS2021Coastal.xlsx", sheet_name="Coastal regions")[
    ["NUTS_ID", "COASTAL CATEGORY"]
]
coastArea.rename(
    columns={"NUTS_ID": "geo", "COASTAL CATEGORY": "coastal"}, inplace=True
)
# Population https://ec.europa.eu/eurostat/databrowser/view/DEMO_R_PJANAGGR3__custom_7060174/default/table?lang=en
population = pd.read_csv("../data/demoNUTS3.csv")

coastalPop = coastArea.merge(population, on="geo", how="left")
# France and UK have three letter abbreviation codes
coastalPop["country"] = coastalPop.geo.str.extract(r"([a-zA-Z]{2})").replace(
    abbrev_to_country
)
coastalPop = coastalPop[coastalPop.country.isin(countriesEU)]
coastalPop = coastalPop.dropna(subset=["TIME_PERIOD"])
coastalPop["TIME_PERIOD"] = coastalPop["TIME_PERIOD"].astype(int)
coastalPop = (
    coastalPop.groupby(
        [
            "country",
            "TIME_PERIOD",
            "coastal",
        ]
    )
    .sum(numeric_only=True)
    .reset_index()
)
# 0 non-coastal, 1  coastal (on coast), 2 coastal (>= 50% of population living within 50km of the coastline)
coastalPop = (
    coastalPop[coastalPop["coastal"].isin([1, 2])]
    .groupby(["country", "TIME_PERIOD"])
    .sum()
)
fteCoastalPop = coastalPop.merge(
    fteEmployment,
    left_on=["country", "TIME_PERIOD"],
    right_on=["country_name", "year"],
    how="inner",
)
fteCoastalPop["fteCoastalPop"] = (
    fteCoastalPop["fteEmployees"] / fteCoastalPop["OBS_VALUE"]
)
# UK missing 2020
fteCoastalPop = fteCoastalPop["year"].drop_duplicates().to_frame().merge(fteCoastalPop["country_name"].drop_duplicates(), how="cross").merge(fteCoastalPop, how="left")
fteCoastalPop.fteCoastalPop = fteCoastalPop.fteCoastalPop.groupby(fteCoastalPop.country_name).fillna(method='ffill')
# We set target using top 3 countries. The more FTE employees/Costal population the better
target = max(calculate_target(fteCoastalPop, 'fteCoastalPop', 'country_name'))
fteCoastalPop['target'] = (fteCoastalPop.fteCoastalPop/target * 100).replace([np.inf, -np.inf], 100).clip(0,100)
Code
# Livelihoods - Quantitative + Qualitative

livelihoods = (
    
        gvaHoursW
        .merge(
            fteCoastalPop[["target", "country_name", "year"]],
            left_on=["Member State", "Year"],
            right_on=["country_name", "year"],
            how="outer",
        )
        .set_index(["Member State", "Year"])[
        ["target_x", "target_y"]
    ]
)
sigma = 10
alpha = 1 / len(livelihoods.columns)
livelihoodsAgg = pd.DataFrame(
    ci.compositeDF(alpha=alpha, sigma=sigma, df=livelihoods),
    columns=["livelihoodsScore"],
).reset_index()
years = [2012,2016,livelihoodsAgg.Year.max()]
livelihoodsAgg = livelihoodsAgg.pivot_table(
    index="Member State", columns="Year", values="livelihoodsScore"
)

livelihoodsAgg[years][livelihoodsAgg.index.isin(countries)]
missingCountries(livelihoodsAgg)
Year 2012 2016 2020
Member State
Belgium 51.802011 51.299815 51.386254
Denmark 32.749801 31.486793 33.027110
Estonia 46.656028 42.047024 36.055520
Finland 29.925018 29.812339 27.788266
France 32.757447 30.114250 31.001004
Germany 56.853249 64.799416 70.483043
Ireland 21.156178 23.834970 22.376832
Latvia 17.910066 20.625229 22.111941
Lithuania 43.769817 51.147603 60.464272
Netherlands 59.048523 54.556125 44.578926
Poland 33.954238 34.399858 39.983381
Portugal 20.076299 22.754069 22.863547
Spain 32.277416 31.543588 30.354344
Sweden 29.692513 33.018899 34.687773
United Kingdom 52.884828 46.003809 52.837006
No missing countries

2. Tourism: GVA/nights spents

Code
# Tourism: GVA/nights spents
# Nights spent at tourist accommodation establishments in coastal areas
# https://ec.europa.eu/eurostat/databrowser/view/TOUR_OCC_NIN2DC__custom_7065857/default/table?lang=en

nightCoast = pd.read_csv("../data/nightsAccomoCoastal.csv")
nightCoast.geo = nightCoast.geo.replace(abbrev_to_country)
nightCoast = nightCoast[nightCoast.geo.isin(countriesEU)]
gvaAccomodation = gva[gva["Sub-sector"] == "Accommodation"]
gvaNights = gvaAccomodation.reset_index().merge(
    nightCoast,
    left_on=["country", "Year"],
    right_on=["geo", "TIME_PERIOD"],
    how="inner",
)
gvaNights["gvaNights"] = gvaNights["gva"] * 1_000_000 / gvaNights["OBS_VALUE"]
# We set target using top 3 countries. The more GVA/nights spents the better
target = max(calculate_target(gvaNights, 'gvaNights', 'country'))
gvaNights['target'] = (gvaNights.gvaNights/target * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2012,2016,gvaNights.Year.max()]
gvaNights = gvaNights.pivot_table(
    index="country", columns="Year", values="target"
)
# fill missing values with backfill and forward fill
gvaNights = (
    gvaNights
    .bfill(axis=1)
    .ffill(axis=1)
)
gvaNights[years][gvaNights.index.isin(countries)]
missingCountries(gvaNights)
Year 2012 2016 2020
country
Belgium 45.334240 38.058850 41.541088
Denmark 81.889205 93.267237 100.000000
Estonia 58.620529 58.491705 75.930535
Finland 65.419935 79.994586 55.925323
France 47.093545 47.012451 42.348381
Germany 70.673069 71.550445 85.368497
Ireland 59.059376 85.079465 100.000000
Latvia 38.809039 53.736533 47.795775
Lithuania 33.817135 52.212723 48.222261
Netherlands 39.665778 40.625908 31.666511
Poland 60.057201 41.072395 27.725916
Portugal 53.680933 57.461233 67.520145
Spain 53.868019 56.316363 44.842960
Sweden 62.006181 75.725888 74.427793
United Kingdom 59.320101 39.530034 39.530034
No missing countries

14.a

Proportion of total research budget allocated to research in the field of marine technology

We use two indicators:

  1. Official UNSD indicator ER_RDE_OSEX: Max-Min transformation where the max-value and min-value are the maximum and minimum in the assessment period (2009-2017) of all European countries (not restricted to the countries in the analysis).
  2. SAD/TAC: Catch-weighted TAC relative to Scientific Advice

Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)

1. Ocean science expenditure ER_RDE_OSEX

Several sources

Code
# %%time
# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)
# https://unstats.un.org/sdgs/indicators/database/archive

# # read old data by chunks to speed loading and save 14.a.1 to separate file
# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')
# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])
# oResearchOld.to_csv('./data/archive14a1.csv', index=False)

oResearchOld = pd.read_csv("../data/archive14a1.csv")
oResearchOld = oResearchOld.pivot(
    index="GeoAreaName", columns="TimePeriod", values="Value"
)
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats
# https://unstats.un.org/sdgs/dataportal/database

# read official data and merge with archive data
oResearch = sdg14[sdg14["SeriesCode"] == "ER_RDE_OSEX"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
oResearch = oResearchOld.merge(
    oResearch, left_index=True, right_index=True, how="outer"
)
# use all countries in Europe
oResearch = oResearch[oResearch.index.isin(countriesEU)].dropna(how="all", axis=1)
# fill nan of year 2013 from new report with old report and 2016 with 2017
oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])
# oResearch[2016] = oResearch[2016].fillna(oResearch[2017])
oResearch = oResearch[list(range(2013, 2018))]
# weighted by EEZ area
oResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")
for col in oResearch.drop("Area_km2", axis=1).columns:
    oResearch[col] = (
        oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum())
    )
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
oResearch = (
    (oResearch.loc[:, 2013:2017] - oResearch.loc[:, 2013:2017].min().min())
    / (
        oResearch.loc[:, 2013:2017].max().max()
        - oResearch.loc[:, 2013:2017].min().min()
    )
    * 100
).round(2)
# fill nan with mean of column
for col in oResearch.columns:
    oResearch[col] = oResearch[col].fillna(oResearch[col].mean())

oResearch = oResearch[oResearch.index.isin(countries)]
oResearch[[2013, 2016, 2017]]

missingCountries(oResearch)
2013 2016 2017
Belgium 0.01000 0.00 0.000000
Bulgaria 3.19375 14.67 8.182222
Croatia 3.19375 14.67 8.182222
Denmark 3.19375 14.67 8.182222
Estonia 3.19375 14.67 8.182222
Finland 0.57000 0.58 0.570000
France 6.30000 14.67 4.200000
Germany 0.40000 0.31 0.300000
Ireland 3.19375 14.67 47.360000
Italy 3.19375 14.67 8.182222
Latvia 3.19375 14.67 8.182222
Lithuania 3.19375 14.67 8.182222
Netherlands 0.35000 0.29 0.330000
Norway 3.19375 14.67 8.182222
Poland 0.10000 0.06 0.060000
Portugal 3.19375 95.47 8.182222
Romania 3.19375 14.67 8.182222
Spain 6.58000 11.48 10.500000
Sweden 3.19375 14.67 8.182222
United Kingdom 11.24000 9.17 10.320000
No missing countries
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats
# https://unstats.un.org/sdgs/dataportal/database

# read official data and merge with archive data
oResearch = sdg14[sdg14["SeriesCode"] == "ER_RDE_OSEX"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
oResearch = oResearchOld.merge(
    oResearch, left_index=True, right_index=True, how="outer"
)
# use all countries in Europe
oResearch = oResearch[oResearch.index.isin(countriesEU)].dropna(how="all", axis=1)
# fill nan of year 2013 from new report
oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])
oResearch = oResearch[list(range(2013, 2018))]
# weighted by EEZ area
oResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")
for col in oResearch.drop("Area_km2", axis=1).columns:
    oResearch[col] = (
        oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum())
    )

print('Conclusions about the following countries shouldn\'t be drawn because they are missing all data:',
      oResearch[(oResearch.index.isin(countries)) & (oResearch.drop("Area_km2", axis=1).isna().all(axis=1))].index.to_list()
)
# fill nan with mean of column
for col in oResearch.columns:
    oResearch[col] = oResearch[col].fillna(oResearch[col].mean())
oResearch = oResearch.drop("Area_km2", axis=1).stack().reset_index().rename(columns={0: "oResearch", "level_1": "Year", "level_0": "country"})

# We set target with top 3 countries. The more ocean science expenditure/total R&D funding, the better.
target = max(calculate_target(oResearch, 'oResearch', 'country'))
oResearch['target'] = (oResearch.oResearch/target * 100).replace([np.inf, -np.inf], 100).clip(0,100)
years = [2013,2016,oResearch.Year.max()]
oResearch = oResearch.pivot_table(
    index="country", columns="Year", values="target"
)

oResearch[years][oResearch.index.isin(countries)]
missingCountries(oResearch)
Conclusions about the following countries shouldn't be drawn because they are missing all data: ['Denmark', 'Estonia', 'Latvia', 'Lithuania', 'Sweden']
No missing countries
Year 2013 2016 2017
country
Belgium 0.192214 0.144283 0.136875
Denmark 20.521594 93.775413 52.356158
Estonia 20.521594 93.775413 52.356158
Finland 3.786670 3.841204 3.801324
France 40.333788 93.775413 26.927500
Germany 2.689630 2.098363 2.051878
Ireland 20.521594 93.775413 100.000000
Latvia 20.521594 93.775413 52.356158
Lithuania 20.521594 93.775413 52.356158
Netherlands 2.362859 2.010297 2.226894
Poland 0.760625 0.528163 0.509660
Portugal 20.521594 100.000000 52.356158
Spain 42.166447 73.435813 67.147209
Sweden 20.521594 93.775413 52.356158
United Kingdom 71.880520 58.640165 65.978803

2. SAD/TAC

Data extracted partially manually.

Source of TAC and SAD is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

If a country fishes only on fish stocks where assignment of TAC follows scientific advice, it would score 100

Code
sadTac = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
sadTac = sadTac[["SAD/TAC"]] * 100
sadTac = sadTac.droplevel(0, axis=1)
# most recent assessment is 2022
sadTac.rename(columns={"most recent": "2022"}, inplace=True)
sadTac = sadTac[sadTac.index.isin(countries)]
sadTac
missingCountries(sadTac)
Year 2012 2016 2022
Belgium 98.292746 97.071052 96.615720
Estonia 92.753209 86.254153 90.900148
Finland 99.440656 97.749144 95.619503
France 90.323954 91.459570 92.379607
Germany 90.655052 85.395915 87.737917
Ireland 98.126385 87.353687 75.753058
Latvia 91.715604 82.541315 91.324412
Lithuania 96.064711 84.378674 88.919591
Netherlands 95.566376 88.761011 89.214834
Poland 94.916995 80.769729 86.833090
Portugal 98.862330 94.596274 91.951678
Spain 91.262037 84.589603 88.343857
Sweden 90.155366 89.159207 87.337151
United Kingdom 79.451505 81.326929 82.954885
Denmark 84.660093 77.694543 84.036427
No missing countries

14.b

Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small‐scale fisheries

We use two indicators:

  1. OHI Artisanal Fishing Opportunities Index: No further transformation
  2. Percentage of Fish Species Threatened: No further transformation

1. OHI ‘Artisanal opportunities’ Index

Source

Code
# OHI 'Artisanal opportunities' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiArt = pd.read_csv("../data/scoresOHI.csv")
ohiArt = ohiArt[ohiArt["region_name"].isin(countries)]
ohiArt = ohiArt[
    (ohiArt.long_goal == "Artisanal opportunities") & (ohiArt.dimension == "status")
]
ohiArt = ohiArt.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiArt[[2012, 2018, 2022]]

missingCountries(ohiArt)
scenario 2012 2018 2022
region_name
Belgium 79.51 77.65 78.66
Denmark 70.50 77.24 74.63
Estonia 89.39 91.18 99.24
Finland 84.67 82.73 77.45
France 77.09 76.44 78.61
Germany 73.21 76.62 73.23
Ireland 69.90 73.46 73.52
Latvia 87.40 89.29 99.01
Lithuania 88.71 90.62 97.63
Netherlands 55.01 57.44 60.35
Poland 87.82 76.21 77.61
Portugal 77.81 65.46 66.08
Spain 73.45 70.66 73.08
Sweden 94.77 95.73 97.37
United Kingdom 81.35 82.74 79.79
No missing countries

2. Percentage of Fish Species Threatened

Source Analysis done in iucnRedList.ipynb

Code
# Percentage of Fish Species Threatened
# Time series comparison is tricky: https://www.iucnredlist.org/assessment/red-list-index

threatenedFish = pd.read_csv("../data/threatenedFishIUCN.csv")
threatenedFish = threatenedFish.pivot_table(
    index="name", columns="year", values="threatenedScore"
)
threatenedFish

missingCountries(threatenedFish)
year 2012 2016 2022
name
Belgium 77.777778 87.786260 84.967320
Denmark 83.516484 89.944134 86.138614
Estonia 80.952381 89.473684 90.243902
Finland 81.818182 89.189189 90.000000
France 88.135593 93.216080 91.332611
Germany 77.611940 87.022901 82.894737
Ireland 86.592179 93.750000 91.885965
Latvia 77.272727 87.804878 88.636364
Lithuania 77.272727 87.500000 88.372093
Netherlands 83.132530 91.489362 89.047619
Poland 76.923077 87.234043 86.274510
Portugal 89.573460 94.731065 92.578125
Spain 89.938398 94.646465 92.513863
Sweden 77.272727 87.121212 84.027778
United Kingdom 86.729858 93.827160 91.337100
No missing countries

14.c

Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea

We use two indicators:

  1. Participation in agreements of the International Marine Organization (IMO Participation Rate):
  2. Mining Area/EEZ (Deep Sea Minining)

1. Participation in agreements of the International Marine Organization

Source

Code
# dictionary for country codes used by the GISIS
with open("../data/gisisDict.csv") as csv_file:
    reader = csv.reader(csv_file)
    gisisDict = dict(reader)

gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization

# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions
# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx
# You need to create account to access data.

# I tried to scrape the data but I am getting errors with Selenium and bs4.
# I downloaded the html manually

gisisCountries = {k: v for k, v in gisisDict.items() if k in countries}
listIMO = []
for v in gisisCountries.values():
    link = "https://gisis.imo.org/Public/ST/Ratification.aspx?cid=" + str(v)
    listIMO.append(link)

# for i in listIMO:
#     webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2016, 2021])

# loop thru the html files in the folder and extract the data
for i in range(len(os.listdir("../data/treatiesIMO/"))):
    countryIMO = np.nan
    imoRate = pd.read_html(
        "../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format(
            i
        )
    )[4]
    # get the country name from the html file name by checking if string is in the list of countries
    for country in countries:
        if country in imoRate["Treaty"][0]:
            countryIMO = country
    if countryIMO is not np.nan:
        imoRate.columns = imoRate.iloc[1]
        imoRate = imoRate[2:]
        # new columns with the year of accession and denounced
        imoRate["accession"] = (
            imoRate["Date of entry into force in country"]
            .str.extract("^([^(]+)")
            .fillna("")
        )
        imoRate["denounced"] = imoRate[
            "Date of entry into force in country"
        ].str.extract(".*\\:(.*)\\).*")
        imoRate[["accession", "denounced"]] = (
            imoRate[["accession", "denounced"]]
            .apply(pd.DatetimeIndex)
            .apply(lambda x: x.dt.year)
        )
        # count the number of treaties each country accessioned and didn't denounced by 2012, 2016 and 2021
        for i in (2012, 2016, 2021):
            imoRate[str(i)] = np.where(
                (imoRate.accession < i)
                & ((imoRate.denounced > i) | (imoRate.denounced.isna())),
                1,
                0,
            )
        imoCount = (
            countryIMO,
            imoRate["2012"].sum(),
            imoRate["2016"].sum(),
            imoRate["2021"].sum(),
        )
        imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount
# calculate total possible treaties, apply dif-ref and convert to percentage
targetIMO = len(imoRate.dropna(subset=["Date of entry into force in country"]))
imoRatedf = imoRatedf.set_index("Country").sort_index()
imoRatedf = 100 * imoRatedf / targetIMO
imoRatedf[imoRatedf>100] = 100
imoRatedf = imoRatedf.astype(np.float64)
imoRatedf = imoRatedf.apply(pd.to_numeric)
imoRatedf

missingCountries(imoRatedf)
2012 2016 2021
Country
Belgium 78.431373 76.470588 90.196078
Denmark 80.392157 86.274510 92.156863
Estonia 76.470588 78.431373 82.352941
Finland 76.470588 78.431373 90.196078
France 84.313725 84.313725 96.078431
Germany 80.392157 82.352941 88.235294
Ireland 66.666667 68.627451 68.627451
Latvia 80.392157 80.392157 82.352941
Lithuania 58.823529 62.745098 64.705882
Netherlands 84.313725 86.274510 92.156863
Poland 80.392157 84.313725 86.274510
Portugal 68.627451 76.470588 86.274510
Spain 86.274510 86.274510 88.235294
Sweden 82.352941 90.196078 94.117647
United Kingdom 78.431373 78.431373 78.431373
No missing countries

1. Mining Area/EEZ

Source

Code
# Deep Sea Mining, Mining Area/EEZ

seaMining = pd.read_excel("../data/deepSeaMining.xlsx", sheet_name="Data")
seaMining["Year"] = 2023
seaMining.rename(columns={"Score 2023": "2023"}, inplace=True)
seaMining = seaMining.set_index(["Country"])[["2023"]]
seaMining
missingCountries(seaMining)
2023
Country
Belgium 0
Germany 100
Denmark 100
Estonia 100
Spain 100
Finland 100
France 100
Ireland 100
Lithuania 100
Latvia 100
Netherlands 100
Poland 97
Portugal 100
Sweden 100
United Kingdom 99
No missing countries

Indicators aggregation

Given our ratio-scale full comparable indicators,IitI_{it}, meaningful aggregation of NN indicators into a composite indicator CItCI_t is obtained according to social choice theory by applying a generalized mean:

CIt(αit,Iit,σ)=(i=1NαitIitσ1σ)σσ1fort=2012,2018,2021(or most recent)CI_t(\alpha_{it},I_{it},\sigma) = \left(\sum^N_{i=1}\alpha_{it}I^{\frac{\sigma-1}{\sigma}}_{it}\right)^{\frac{\sigma}{\sigma-1}} \quad \text{for} \quad t = 2012, 2018, 2021 \text{(or most recent)}

with weights αit>0\alpha_{it} > 0 and $0 $. The parameter σ\sigma is used to quantify the elasticity of substitution between the different indicators. High (low) values of σ\sigma imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of σ\sigma correspond to concepts of weak and strong sustainability, respectively. Depending on σ\sigma, one can obtain a full class of specific function forms for the composite indicator.

We define:

σTarget=0.5\sigma_{Target} = 0.5 and σTarget=10\sigma_{Target} = 10

Code
varDf = [
    nitro,
    eutro,
    wasteG,
    wasteR,
    mspVal,
    ohiBio,
    co2,
    phCopernicus,
    fmsyF,
    bBmsy,
    kbaMPA,
    natura2kEEZ,
    fseRisk,
    tacCatch,
    compositeIUU,
    livelihoodsAgg,
    economies,
    gvaNights,
    oResearch,
    sadTac,
    ohiArt,
    threatenedFish,
    imoRatedf,
    seaMining,
]
varNames = [
    "nitro",
    "eutro",
    "wasteG",
    "wasteR",
    "mspVal",
    "ohiBio",
    "co2",
    "phCopernicus",
    "fmsyF",
    "bBmsy",
    "kbaMPA",
    "natura2kEEZ",
    "fseRisk",
    "tacCatch",
    "compositeIUU",
    "livelihoodsAgg",
    "economies",
    "gvaNights",
    "oResearch",
    "sadTac",
    "ohiArt",
    "threatenedFish",
    "imoRatedf",
    "seaMining",
]

dictIndicators = dict(zip(varNames, varDf))
# stack variables in each dataframe
for name, df in dictIndicators.items():
    df = df.stack().to_frame().rename(columns={0: str(name)})
    df.index.names = ["Country", "Year"]
    df.reset_index(inplace=True)
    df.Year = df.Year.astype(int)
    df.set_index(["Country", "Year"], inplace=True)
    dictIndicators[name] = df
# merge all variables into one dataframe, forward and back fill by country
indicators = pd.concat(dictIndicators.values(), axis=1, join="outer")
indicators = indicators.reset_index().sort_values(["Country", "Year"])
indicators = indicators[indicators.Year.isin(list(range(2012, 2022)))]
indicators = indicators.groupby(["Country"], group_keys=False).apply(
    lambda x: x.ffill().bfill()
)
indicators = indicators.set_index(["Country", "Year"])
indicatorsL = {
    "plastic": ["wasteG", "wasteR"],
    "14.1": ["nitro", "eutro", "plastic"],
    "14.2": ["mspVal", "ohiBio"],
    "14.3": ["co2pc", "phCopernicus"],  # dropped "scoreESD"
    "14.4": ["fmsyF", "bBmsy"],
    "14.5": ["kbaMPA", "natura2kEEZ"],
    "14.6": ["fseRisk", "tacCatch", "compositeIUU"],
    "14.7": ["livelihoodsAgg", "economies", "gvaNights"],
    "14.a": ["oResearch", "sadTac"],
    "14.b": ["ohiArt", "threatenedFish"],
    "14.c": ["imoRatedf", "seaMining"],
}

# calculate composite indicators for each target importing the function from composite.py
targets = indicators.copy()
for target, indicator in indicatorsL.items():
    try:
        alpha = 1 / len(indicatorsL[target])
        df = targets[indicator]
        sigma = 10
        targets[target] = ci.compositeDF(alpha, df, sigma)
    except KeyError:
        print("Missing indicator for", target)
targets = targets[[i for i in targets if i.startswith("14")]]

Monte Carlo simulation

Code source is in the composite.py file

Code
%%time
# calculate composite score for each target importing the function from composite.py
# monte carlo for strong sustainability and one sigma for weak sustainability
scoresStrong = ci.compositeMC(df = targets, years=[2012, 2016, 2021], simulations=10_000)
scoresWeak = pd.DataFrame(ci.compositeDF(alpha = 1 / len(targets.columns), df = targets, sigma = 10), columns=['scoreWeak'])
scoresStrong = scoresStrong[scoresStrong.index.get_level_values('Country').isin(countries)]
CPU times: user 13.8 s, sys: 7.7 ms, total: 13.8 s
Wall time: 13.8 s

Plots

Code
InteractiveShell.ast_node_interactivity = "last_expr"  # last_expr
Code
# merge all data and calculate EEZ-weigthed average for indicators and targets

data_frames = [indicators, targets, scoresStrong, scoresWeak]
fullData = reduce(
    lambda left, right: pd.merge(
        left, right, left_index=True, right_index=True, how="inner"
    ),
    data_frames,
)

eezAverage = (
    pd.DataFrame(fullData.stack().reset_index())
    .merge(eez, left_on="Country", right_on="Country", how="left")
    .rename(columns={0: "value", "level_2": "indicator"})
)

eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2
eezAverage = (
    eezAverage.groupby(["Year", "indicator"])
    .sum(numeric_only=True)
    .drop("value", axis=1)
)
eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
Code
# define function to plot boxplots


def plotBox(
    df,
    xaxis_title=str,
    yaxis_title=str,
    xlegend=float,
    ylegend=float,
    maxShift=float,
    minShift=float,
):
    # start plots
    fig = px.box(df, x="indicator", y="value")
    # add Countries in the max and min
    # create annotation list, x is the x-axis position of the annotation
    annoList = []

    maxCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get max value for indicator
        maxVal = np.max(df[df["indicator"] == s]["value"])
        # get countries with max value, if more than 4 countries, use *
        countries = df.loc[
            (df["value"] == maxVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            maxCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the max value
            y=maxVal,
            text=countries,
            yshift=maxShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    minCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get min value for indicator
        minVal = np.min(df[df["indicator"] == s]["value"])
        # get countries with min value, if more than 4 countries, use * and store in dictionary
        countries = df.loc[
            (df["value"] == minVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            minCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the min value
            y=minVal,
            text=countries,
            yshift=minShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    # add EEZ-weigthed average values
    indicatorAverage = eezAverage.loc[(2021, df.indicator.unique()), :].reset_index()
    for s in indicatorAverage.indicator.unique():
        # get EEZ-weigthed average value for indicator
        averageVal = float(
            indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ"]
        )
        fig.add_scatter(
            x=[s],
            # y position is the average value
            y=[averageVal],
            type="scatter",
            mode="markers",
            legendgroup="EEZ average",
            name="EEZ average",
            marker=dict(color="black", size=6),
        )

    fig.update_layout(annotations=annoList)

    # just to add the legend with one entry
    fig.update_traces(showlegend=False).add_scatter(
        x=[s],
        y=[averageVal],
        type="scatter",
        mode="markers",
        legendgroup="EEZ average",
        name="EEZ average",
        marker=dict(color="black", size=6),
    )

    # change legend position, axis titles
    fig.update_layout(
        legend=dict(
            x=xlegend,
            y=ylegend,
            traceorder="normal",
            font=dict(family="sans-serif", size=12, color="black"),
            bgcolor="White",
        ),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        # yaxis_range=[-20,100]
    )

    fig.show()
Code
# plot min, max, and EEZ-weigthed average for targets

# only for 2021
indicatorsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
indicatorsBox = indicatorsBox[indicatorsBox.indicator.isin(varNames)]
indicatorsBox.Country = indicatorsBox.Country.map(country_to_abbrev)


targetsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
targetsBox = targetsBox[targetsBox.indicator.isin(list(indicatorsL.keys()))]

plotBox(
    indicatorsBox,
    "Indicators",
    "Percentage",
    xlegend=0.85,
    ylegend=0.2,
    maxShift=30,
    minShift=-10,
)
plotBox(
    targetsBox, "Targets", "Percentage", xlegend=0, ylegend=1, maxShift=10, minShift=-10
)
Code
# plot comparing strong and weak sustainability
df = fullData.reset_index()

# translate score to ranking
df["rankMean"] = df.groupby("Year")["scoreMean"].rank(ascending=False)
df["rankStd"] = df["scoreStd"] * len(df["Country"].unique()) / 100
df["rankWeak"] = df.groupby("Year")["scoreWeak"].rank(ascending=False)

for year in [2012, 2016, 2021]:
    fig = px.scatter(
        df[df.Year == year],
        x="rankWeak",
        y="rankMean",
        text="Country",
        title=str(year),
        error_y=np.array(df[df.Year == year]["rankStd"]),
    )
    fig.update_traces(
        textposition="bottom right",
        marker=dict(color="black", size=0),
        error_y=dict(width=0),
    )
    # add 45 degree line
    fig.update_layout(
        shapes=[
            {
                "type": "line",
                "yref": "paper",
                "xref": "paper",
                "y0": 0,
                "y1": 1,
                "x0": 0,
                "x1": 1,
                "line": {"color": "grey", "width": 2},
                "layer": "below",
            }
        ]
    )
    # change axis titles, fix aspect ratio
    fig.update_layout(
        xaxis_title=r"$\text{Ranking with unlimited substitution possibilities} \quad \sigma= \infty$",
        yaxis_title=r"$\text{Ranking with limited substitution possibilities} \quad \sigma=\text{U(0,1)}$",
        font=dict(family="sans-serif"),
        autosize=False,
        width=800,
        height=800,
        # important for 45 degree line to show correctly
        yaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
        xaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
    )
    fig.show()
Code
tableLatex = df[
    [
        "Country",
        "Year",
        "scoreMean",
        "scoreStd",
        "rankMean",
        "rankStd",
        "scoreWeak",
        "rankWeak",
    ]
]

tableLatex = tableLatex.set_index(["Country", "Year"]).stack().unstack([1, 2])
tableLatex = tableLatex.rename_axis(["Year", "Value"], axis=1)
tableLatex = tableLatex[
    [
        # score strong
        (2012, "scoreMean"),
        (2012, "scoreStd"),
        (2016, "scoreMean"),
        (2016, "scoreStd"),
        (2021, "scoreMean"),
        (2021, "scoreStd"),
        # rank strong
        (2012, "rankMean"),
        (2012, "rankStd"),
        (2016, "rankMean"),
        (2016, "rankStd"),
        (2021, "rankMean"),
        (2021, "rankStd"),
        # score weak
        (2012, "scoreWeak"),
        (2012, "rankWeak"),
        (2016, "scoreWeak"),
        (2016, "rankWeak"),
        (2021, "scoreWeak"),
        (2021, "rankWeak"),
        # rank weak
    ]
]
conceptL = ["Concept of Strong Sustainability ( σ ∼ U (0,1), N = 10,000)"] * 12 + [
    "Concept of Weak Sustainability (σ → ∞)"
] * 6

# add multiindex to tableLatex using conceptL list
# tableLatex.columns = pd.MultiIndex.from_arrays([conceptL, tableLatex.columns.levels[0],tableLatex.columns.levels[1] ],names=[["Concept", "Year", 'Value']])
tableLatex = tableLatex.sort_values(by=(2012, "rankMean"))
tableLatex = tableLatex.style.format(precision=2)
tableLatex
# print(tableLatex.to_latex(siunitx=True, hrules=True))
Year 2012 2016 2021 2012 2016 2021 2012 2016 2021
Value scoreMean scoreStd scoreMean scoreStd scoreMean scoreStd rankMean rankStd rankMean rankStd rankMean rankStd scoreWeak rankWeak scoreWeak rankWeak scoreWeak rankWeak
Country                                    
Belgium 51.97 4.94 45.70 11.89 47.11 10.90 1.00 0.74 6.00 1.78 2.00 1.63 59.02 2.00 63.11 2.00 62.28 1.00
Latvia 51.65 6.49 35.12 13.28 37.37 11.21 2.00 0.97 13.00 1.99 9.00 1.68 60.85 1.00 59.77 6.00 58.07 8.00
Estonia 48.15 6.25 48.18 7.06 49.01 9.10 3.00 0.94 4.00 1.06 1.00 1.36 58.80 3.00 59.52 7.00 61.64 2.00
Netherlands 47.96 7.10 39.85 7.95 41.91 8.32 4.00 1.07 10.00 1.19 7.00 1.25 56.70 8.00 52.88 14.00 55.45 12.00
Germany 47.18 6.26 44.05 11.00 43.94 11.05 5.00 0.94 8.00 1.65 6.00 1.66 57.47 5.00 60.08 5.00 60.59 3.00
Finland 47.05 6.92 52.22 4.86 45.42 9.20 6.00 1.04 3.00 0.73 4.00 1.38 57.12 6.00 59.41 8.00 57.67 9.00
Poland 46.31 7.93 33.88 13.69 32.64 12.76 7.00 1.19 14.00 2.05 13.00 1.91 58.44 4.00 58.95 9.00 56.13 11.00
Sweden 46.17 6.74 53.91 6.16 45.82 9.18 8.00 1.01 2.00 0.92 3.00 1.38 56.94 7.00 61.95 3.00 58.62 7.00
Ireland 46.04 5.10 0.00 0.00 0.00 0.00 9.00 0.76 15.00 0.00 15.00 0.00 54.31 13.00 47.91 15.00 49.01 15.00
Portugal 44.90 5.74 54.46 6.84 41.00 11.05 10.00 0.86 1.00 1.03 8.00 1.66 54.73 12.00 63.90 1.00 57.12 10.00
Lithuania 43.82 7.01 46.65 8.75 44.41 9.54 11.00 1.05 5.00 1.31 5.00 1.43 54.19 14.00 60.89 4.00 59.54 5.00
France 42.33 8.15 42.27 9.57 33.66 14.65 12.00 1.22 9.00 1.44 10.00 2.20 56.12 10.00 57.37 10.00 60.53 4.00
Denmark 39.93 9.36 35.25 12.73 32.88 15.03 13.00 1.40 12.00 1.91 11.00 2.25 56.09 11.00 56.38 12.00 58.78 6.00
United Kingdom 37.50 12.06 36.29 9.96 32.67 10.42 14.00 1.81 11.00 1.49 12.00 1.56 56.40 9.00 55.81 13.00 54.58 13.00
Spain 32.65 9.39 45.59 8.21 30.44 12.02 15.00 1.41 7.00 1.23 14.00 1.80 51.80 15.00 57.25 11.00 52.63 14.00

Trash bin

2. Carbon emissions per capita

Several sources (see code)

Code
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)
# CO2, KG_HAB, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/

# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')
# co2 = co2[co2['geo'].isin(countryAbb)]
# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])
# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')

# mr = co2.columns[-1] # most recent year
# # co2[[2012, 2016, mr]]/1000

1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)

Several sources (see code)

2. Carbon emissions per capita

Several sources (see code)

Code
# # CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# # https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

# co2 = pd.read_csv("../data/env_air_gge.csv")
# co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
# co2 = co2[
#     co2["geo"].isin(countries)
#     & (co2.airpol == "CO2")
#     & (co2.src_crf == "TOTX4_MEMONIA")
# ]
# co2 = co2.pivot_table(
#     columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
# )

# mr = co2.columns[-1]  # most recent year
# co2pc = co2 / pop * 1000  ## tonnes co2 per capita
# co2pc.dropna(how="all", inplace=True)
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
# co2pc = (
#     (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
#     / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)

# co2pc[[2012, 2016, mr]]

# missingCountries(co2pc)
Code
# # Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency
# # https://www.eea.europa.eu/data-and-maps/data/esd-4

# ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)
# ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]
# ghgESD = ghgESD.set_index("Geographic entity")
# ghgESD = ghgESD.dropna(axis=1, how="all")
# negativeValues(ghgESD)
# mr = ghgESD.columns[-1]  # most recent year
# ghgESD = ghgESD * 1_000_000
Code
# # Allocation for 2020 target
# # https://ec.europa.eu/clima/ets/esdAllocations.do?languageCode=en&esdRegistry=-1&esdYear=&search=Search&currentSortSettings=
# allocation2020 = pd.read_xml(
#     "../data/esdAllocations2020.xml", xpath=".//ESDAllocations/ESDAllocationInfo"
# )
# allocation2020["Country"] = (
#     allocation2020["ESDMemberState"]
#     .map(abbrev_to_country)
#     .fillna(allocation2020["ESDMemberState"])
# )
# allocation2020 = allocation2020[allocation2020["Country"].isin(countries)]
# allocation2020 = allocation2020.pivot_table(
#     columns="ESDYear", index="Country", values="Allocation", aggfunc="mean"
# )

# # Allocation for 2030 target
# # https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32020D2126
# allocation2030 = pd.read_html("../data/esdAllocations2030.html")[13][1:]
# allocation2030.columns = allocation2030.iloc[0]
# allocation2030 = allocation2030[1:]
# allocation2030.loc[
#     allocation2030["Member State"].str.contains("Netherlands"), "Member State"
# ] = "Netherlands"
# allocation2030 = allocation2030[allocation2030["Member State"].isin(countries)]
# allocation2030.set_index("Member State", inplace=True)
# for col in allocation2030.columns:
#     allocation2030[col] = allocation2030[col].apply(
#         lambda x: str(x).replace("\xa0", "")
#     )
# allocation2030 = allocation2030.astype(int)

# # Merge 2005 values with 2020 and 2030 allocations. Interpolate for years 2006-2012
# allocationESD = ghgESD[[2005]].merge(
#     allocation2020.merge(
#         allocation2030, left_index=True, right_index=True, how="outer"
#     ),
#     left_index=True,
#     right_index=True,
#     how="outer",
# )
# allocationESD.columns = allocationESD.columns.astype(int)
# allocationESD[list(range(2006, 2013))] = np.nan  # create empty columns for 2006-2012
# allocationESD = allocationESD[list(range(2005, 2031))]
# allocationESD.interpolate(axis=1, inplace=True)

# # Calculate score for ESD
# scoreESD = 100 - (100 * (ghgESD - allocationESD) / allocationESD)
# scoreESD[scoreESD > 100] = 100
# scoreESD = scoreESD.ffill(axis=1)  # fill empty values with last available year
# scoreESD[[2013, 2016, 2021]]
# missingCountries(scoreESD)

Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

Code
# # Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

# scoreESD2020 = 100 - 100 * (
#     ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)
# ).divide(allocationESD[2020], axis=0)
# scoreESD2030 = 100 - 100 * (
#     ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)
# ).divide(allocationESD[2030], axis=0)
# scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)
# scoreESD1[scoreESD1 > 100] = 100
# # scoreESD1[[2012, 2018, 2021]]
Code
# # Effort sharing regulation
# # Get the targets for 2020 and 2030 in percentage
# # Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels
# # There targets for 2020 and for 2030
# # https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas
# # Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)

# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)
# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})
# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]
# limitESR.set_index('Country', inplace=True)
# limitESR = limitESR[limitESR.index.isin(countries)]
# #UK is not in the dataset, we need to add from the official journal
# limitESR
# missingCountries(limitESR)

1. Fishing Subsidies relative to Landings

Several sources

Code
# # Landing data for Fishing Subsidies

# # load oecd abbreviation list
# with open("../data/oecdAbb.csv") as csv_file:
#     reader = csv.reader(csv_file)
#     oecdAbb = dict(reader)

# # Landings in USD
# # https://data.oecd.org/fish/fish-landings.htm

# landing = pd.read_csv("../data/fishLandingsOECD.csv")
# landing["LOCATION"] = landing["LOCATION"].map(oecdAbb).fillna(landing["LOCATION"])
# landing = landing[
#     landing["LOCATION"].isin(countries)
#     & (landing["MEASURE"] == "USD")
#     & (landing["SUBJECT"] == "TOT")
# ]
# # landing.pivot_table(columns='TIME', index='LOCATION', values='Value', aggfunc='mean')
# # landing = landing[list(range(2010, 2021))]
# landing.rename(
#     columns={"LOCATION": "Country", "TIME": "Year", "Value": "Landing"}, inplace=True
# )
# landing.set_index(["Country", "Year"], inplace=True)


# # merge subsidies-landings and calculate score
# fseLanding = pd.merge(
#     fseRisk,
#     landing,
#     how="left",
#     left_on=["Country", "Year"],
#     right_on=["Country", "Year"],
# )
# fseLanding["fseLanding"] = fseLanding.Value / fseLanding.Landing
# fseLanding = fseLanding.pivot_table(
#     columns="Year", index="Country", values="fseLanding", aggfunc="mean"
# )
# mr = fseLanding.columns[-1]  # most recent year
# # Poland is an outlier
# # fseLanding = fseLanding[~fseLanding.index.str.contains("Poland")]
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best

# fseScore = (
#     (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr])
#     / (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)
# # fseScore = fseScore.ffill(axis=1)  # fill empty values with last available year
# fseScore[[2012, 2016, mr]]
# missingCountries(fseScore)
Code
# From 14.6, using Eurostat data for landing values.

# Even though the USD-EUR discrepancy does not affect the ratio we calculate,
# we get today's exchange rate to convert landing values to USD and have a consistent unit
# Solution source: https://stackoverflow.com/a/17250702/14534411


# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)
# tree = ET.parse(r.raw)
# root = tree.getroot()
# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}
# currency = 'USD'
# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)
# eurTOusd = float(match.attrib['rate'])

# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/

# landing = pd.read_csv('../data/fish_ld_main.csv')
# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)
# landing = landing[landing.Country.isin(countries)]
# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]
# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd
# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')
Code
# Alternative dataset for fisheries subsidies

# https://www.sciencedirect.com/science/article/abs/pii/S0308597X16000026#bib4


# sumaila2009 = pd.read_excel("../data/subsidies2016Sumaila.xlsx")
# sumaila2009["Countries"] = sumaila2009["Countries"].str.strip()
# sumaila2009 = sumaila2009[
#     sumaila2009["Countries"].isin(countries) | sumaila2009["Countries"].isin(countryAbb)
# ]
# sumaila2009.Countries = sumaila2009.Countries.map(abbrev_to_country).fillna(
#     sumaila2009.Countries
# )
# sumaila2009 = sumaila2009[["Countries", "ReEst_Subsidy2009", "SubType"]]
# sumaila2009 = sumaila2009.groupby(["Countries"]).sum(numeric_only=True)
# # sumaila2009

# missingCountries(sumaila2009)

# sumaila2018 = pd.read_csv("../data/subsidies2019Sumaila.csv")
# sumaila2018 = sumaila2018[["Country", "Constant 2018 USD", "Type"]]
# sumaila2018 = sumaila2018[sumaila2018.Country.isin(countries)]
# sumaila2018 = sumaila2018.groupby(["Country"]).sum(numeric_only=True)
# # sumaila2018

# missingCountries(sumaila2018)

OHI Habitat subgoal (Biodiversity)

Code
# # habitat subgoal
# # https://github.com/OHI-Science/ohi-global/tree/draft/yearly_results/global2022/Results/data
# years = [2012, 2016, 2022]
# habitatOHI = pd.DataFrame()
# for year in years:
#     tempDF = pd.read_csv("../data/habitatOHI/scores_eez_{0}.csv".format(year))
#     tempDF["year"] = year
#     habitatOHI = pd.concat([habitatOHI, tempDF])
# habitatOHI = habitatOHI[habitatOHI.region_name.isin(countries)][
#     ["region_name", "year", "HAB"]
# ]
# habitatOHI = habitatOHI.pivot_table(index="region_name", columns="year", values="HAB")
# habitatOHI
# missingCountries(habitatOHI)

EEZ as per eea.europa.eu

Code
# EEZ as per the EEA (France does not include outermost regions)
# https://www.eea.europa.eu/data-and-maps/daviz/n2k-cover-in-national-marine-waters/#tab-chart_2

# eezEEA = pd.read_csv("../data/n2k-cover-in-national-marine-waters.csv")
# eezEEA["eezEEA"] = (
#     eezEEA["Natura 2000 surface area:number"]
#     / eezEEA["Natura 2000 cover in national waters:number"]
#     * 100
# )
# eezEEA.rename(columns={"Country:text": "Country"}, inplace=True)
# eezEEA.set_index("Country", inplace=True)
# eezEEA = eezEEA[(eezEEA.index.isin(countries))]
# eezEEA.eezEEA.astype(int)

MPAs as defined by IUCN

Code
# iso3 = pd.read_csv("../data/iso3.csv")
# iso3 = iso3[iso3.name.isin(countries + outermost)]
# iso3Countries = list(iso3.iso3)
# dictISO3 = iso3.set_index("iso3").to_dict()["name"]

# # Protected Planet WDPA data
# # https://www.protectedplanet.net/en/thematic-areas/wdpa?tab=WDPA
# # Metadata https://wdpa.s3-eu-west-1.amazonaws.com/WDPA_Manual/English/WDPA_WDOECM_Manual_1_6.pdf
# wdpa = pd.read_csv("../data/WDPA_Jul2023_Public_csv/WDPA_Jul2023_Public_csv.csv")
Code
# wdpaDF = wdpa[
#     (wdpa.MARINE.isin([1, 2]))
#     & (wdpa.PA_DEF == 1)
#     & (wdpa.ISO3.isin(iso3Countries + ["BLM;GLP;MAF;MTQ"]))
#     & (wdpa.STATUS.isin(["Designated"]))
#     # & (wdpa.DESIG_ENG.str.contains('Directive')) # Check for Natura 2000 sites
# ]
# mpaProtected = pd.DataFrame()
# for year in [2012, 2016, 2022]:
#     tempDF = (
#         wdpaDF[wdpaDF.STATUS_YR <= year].groupby("PARENT_ISO3").sum(numeric_only=True)
#     )
#     tempDF["year"] = year
#     mpaProtected = pd.concat([mpaProtected, tempDF])
# mpaProtected.index = mpaProtected.index.map(dictISO3)
# mpaProtected = mpaProtected.merge(eez, left_index=True, right_index=True)
# mpaProtected["mpaEEZ"] = mpaProtected.REP_M_AREA / mpaProtected.Area_km2 * 100
# mpaProtected["Score"] = 100 * (mpaProtected.mpaEEZ) / 30
# mpaProtected.loc[mpaProtected["Score"] > 100, "Score"] = 100
# mpaProtected = mpaProtected.pivot(columns="year", values="Score")
# mpaProtected
# missingCountries(mpaProtected)
Code
# # Old MPA data (similar results to Natura2k, even though they use the Protected Planet data)
# # Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
# # https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS

# mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)
# mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")
# mpa = mpa.dropna(axis=1, how="all")
# mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)
# # display all rows
# negativeValues(mpa)
# mpa = (mpa / 0.3).round(2)  # dis-ref with target 30%
# mpa[mpa > 100] = 100
# mpa.sort_index(inplace=True)
# mpa[["2016", "2021"]]
# missingCountries(mpa)

Measures under the Marine Strategy Framework Directive (DROPPED)

Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN
# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393

# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF
# But the assessment is much shorter. They refer the reader to a JRC report:
# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363
# That report assesses all the descriptors, but it cannot be compared to the previous assessment.
# Moreover, the source code and data are not available.

# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.
# Countries report different measures and data is poor.